library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.2.5
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'ggplot2' was built under R version 3.2.4
## Warning: package 'tibble' was built under R version 3.2.5
## Warning: package 'tidyr' was built under R version 3.2.5
## Warning: package 'readr' was built under R version 3.2.5
## Warning: package 'purrr' was built under R version 3.2.5
## Warning: package 'dplyr' was built under R version 3.2.5
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
ggplot(data = mpg) +
  geom_point(mapping = aes(x = displ, y = hwy))

## #### ##
## EGGS ##
## #### ##

ggplot(data = mpg)

nrow(mtcars)
## [1] 32
ncol(mtcars)
## [1] 11
?mpg
ggplot(data = mpg) +
  geom_point(mapping = aes(x = cyl, y = hwy))

ggplot(data = mpg) +
  geom_point(mapping = aes(x = drv, y = class))

## ## ## ## ## ## ## ## 
## Aesthetic Mapping ## 
## ## ## ## ## ## ## ## 

# Check if outlier cars with higher than avg mileage and rel. large displacements
# Are subcompact vehicles

# Use different levels of an aesthetic property (diff levels of size, cols, shape...2)
ggplot(mpg) + 
  geom_point(aes(x = displ, y = hwy, col = class))

## #### ##
## EGGS ##
## #### ##

ggplot(data = mpg) +
  geom_point(
    mapping = aes(x = displ, y = hwy, color = "blue")
  ) # Not blue because color inside of aes? it's as if color = x, with x <- "blue"

# Instead, do: 
ggplot(data = mpg) +
  geom_point(
    mapping = aes(x = displ, y = hwy), color = "blue"
  )

# Mapping a continous var to color, size, shape. Then categorical
str(mpg)
## Classes 'tbl_df', 'tbl' and 'data.frame':    234 obs. of  11 variables:
##  $ manufacturer: chr  "audi" "audi" "audi" "audi" ...
##  $ model       : chr  "a4" "a4" "a4" "a4" ...
##  $ displ       : num  1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int  1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int  4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr  "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr  "f" "f" "f" "f" ...
##  $ cty         : int  18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int  29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr  "p" "p" "p" "p" ...
##  $ class       : chr  "compact" "compact" "compact" "compact" ...
ggplot(mpg) + 
  geom_point(aes(x = displ, y = hwy, col = class))

# For continuous variables:
# Color -> gradient  
# Size -> selects a couple of sizes 
# Shape -> Error: A continuous variable cannot be mapped to shape. 
# For categorical vars, works as expected. 

# Same variable to multiple aestethics: 
ggplot(mpg) +
  geom_point(aes(x = displ, y = hwy, col = class, size = class))
## Warning: Using size for a discrete variable is not advised.

# Works as expected, legend is changed accordingly too. 

# Stroke aes: for shapes that have a border, can color outside seperatly
ggplot(mpg) + geom_point(aes(x = displ, y = hwy), shape = 21, stroke = 5)

# mapping an aes to something else than a var 
ggplot(mpg) +
  geom_point(aes(x = displ, y = hwy, col = displ < 5)) # Like magic 

## ## ## ### 
## FACETS ## 
## ## ## ###

# Split cat. data with facets, with each subplot displaying one subset of the data 
# First argumnent is R formula object, should be categorical var
ggplot(mpg) +
  geom_point(aes(x = displ, y = hwy)) +
  facet_wrap(~ class, nrow = 2)

# To facet with a combo of 2 vars, use facet_grid
ggplot(mpg) +
  geom_point(aes(x = displ, y = hwy)) +
  facet_grid(drv ~ cyl) 

# ggplot(mpg) +
#   geom_point(aes(x = displ, y = hwy)) +
#   facet_grid(cyl~drv) 

ggplot(mpg) +
  geom_point(aes(x = displ, y = hwy)) +
  facet_grid(cyl ~ .) # To not use rows or col. 

## #### ## 
## EGGS ## 
## #### ## 

# Try facet-ing a cont variable
ggplot(mpg) +
  geom_point(aes(x = displ, y = hwy)) +
  facet_wrap(~hwy) # LOTSA values - seems like it takes it like factor

ggplot(mpg) +
  geom_point(mapping = aes(x = drv, y = cyl))

# Compare this to facet_grid(drv ~ cyl) empty cells 
# Empty cell = empty point in plot above 
# For cells that do have the corresponding drv, cyl combo -> 
# get a more in depth view on displ and hwy values

## What does this do? 
ggplot(mpg) +
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(drv ~ .) # Plot on one col only 

ggplot(mpg) +
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(. ~ cyl) # plot on one row only

## How about
ggplot(mpg) +
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(~ cyl) # Same as the above 

## Advantages and cons of faceting - compare with 1st facet code
ggplot(mpg) +
  geom_point(aes(x = displ, y = hwy, col = class))

# Pro: easier seperation of data (if have a lot of data points)
# Can see finer details for every group 
# Con: (and advantage of using aesthetics): how do different variables 'interact' 

## ## ## ## ## ## 
## Geom Object ## 
## ## ## ## ## ## 

ggplot(mpg) +
  geom_point(aes(x = displ, y = hwy, col = drv)) +
  geom_smooth(aes(displ, hwy, col = drv, linetype = drv))

# OR 
ggplot(mpg, aes(x = displ, y = hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth() # Neat 

# Can use same idea to specify different 
# data for each layer 
ggplot(mpg, aes(x = displ, y = hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth(
    data = filter(mpg, class == "subcompact"),
    se = F
)

## #### ## 
## EGGS ## 
## #### ## 
ggplot(mpg, aes(x = displ, y= hwy, color = drv)) +
  geom_point() + 
  geom_smooth(se = F)

ggplot(mpg, aes(x = displ, y = hwy)) +
  geom_point() +
  geom_smooth(se = F)

ggplot(mpg, aes(x = displ, y = hwy)) +
  geom_point() +
  geom_smooth(se = F, aes(group = drv))

ggplot(mpg, aes(x = displ, y = hwy, col = drv)) +
  geom_point() +
  geom_smooth(se = F)

ggplot(mpg, aes(x = displ, y = hwy)) +
  geom_point(aes(color = drv)) +
  geom_smooth(se = F)

ggplot(mpg, aes(x = displ, y = hwy)) +
  geom_point(aes(col = drv)) +
  geom_smooth(se = F, aes(linetype = drv))

ggplot(mpg, aes(x = displ, y = hwy)) +
  geom_point(aes(col = drv))

## ## ## ## ## ## ## ## ## 
## Stat. Transformation ##
## ## ## ## ## ## ## ## ##

print("
@note: The algorithm used to calc. new values for a graph 
called stat (for statistical transformation)
Each geom uses a default stat argument 
@note: ?geom_bar (or other) and search for computed variables
      ")
## [1] "\n@note: The algorithm used to calc. new values for a graph \ncalled stat (for statistical transformation)\nEach geom uses a default stat argument \n@note: ?geom_bar (or other) and search for computed variables\n      "
# Can use geom and stat interchangeably
# e.g:
ggplot(diamonds) + geom_bar(aes(x = cut))

ggplot(diamonds) + stat_count(aes(x = cut))

print("
      @note: The above works because every stat
has a default geom and every geom has a def stat. 
 If want to set your own y for bar chart, use stat = 'identity'
      ")
## [1] "\n      @note: The above works because every stat\nhas a default geom and every geom has a def stat. \n If want to set your own y for bar chart, use stat = 'identity'\n      "
# Proportion bar chart 
ggplot(diamonds) + geom_bar(aes(x = cut, y = ..prop.., group = 1)) # @question: What does group do? 

# Summarize y values for every x 
ggplot(diamonds) + stat_summary(aes(x = cut, y = depth),
                            fun.ymin = min, 
                            fun.ymax =  max, 
                            fun.y = median)

?stat_summary
?stat_bin

## #### ##
## EGGS ##
## #### ##

# Defaylt geom associated with stat_summary is geom_pointrange
ggplot(diamonds, aes(x = cut, y = depth)) + 
         geom_pointrange(stat = "summary", fun.ymin = min, fun.ymax = max, fun.y = median)

?geom_line

ggplot(data = diamonds) +
  geom_bar(aes(x = cut, y = ..prop.., fill = cut))

# If group is not set to 1, then all the bars have prop == 1. 
# The function geom_bar assumes that the groups are equal to the x values, 
# since the stat computes the counts within the group.

## ## ## ## ## 
## Pos. Adj ## 
## ## ## ## ## 

# Can use a diff variable for fill in geom_bar, 
# Will atomatically split bars
ggplot(diamonds) +
  geom_bar(aes(x = cut, fill = clarity))

# @note: Stacking done automatiocally by position adj. (spec,. by position arg)

# Pos = identity: overlaps bars, but useful for points
ggplot(diamonds, aes(x = cut, fill = clarity)) +
  geom_bar(alpha = 1/5, position = "identity")

ggplot(diamonds, aes(x = cut, color = clarity)) +
  geom_bar(fill= NA, position = "identity")

# Position = fill works like stacking 
# But stacked bars the same height -> better to compare groups
ggplot(diamonds, aes(x = cut, fill = clarity)) +
  geom_bar(position = "fill")

# Position = dodge places overlapping objects beside each other 
ggplot(diamonds) +
  geom_bar(aes(x = cut, fill = clarity), position = 'dodge')

# For scatterplots: to avoid rounding and overlapping of vars
# (How to know if one point is one or 1000 points rounded)
# use pos = jitter to add random noise to data
# reduces chance of overlap because no two points will receive 
# same random noise
ggplot(mpg) + 
  geom_point(aes(x = displ, y = hwy), position = "jitter")

ggplot(mpg) + 
  geom_point(aes(x = displ, y = hwy))

# Can also do geom_jitter instead of geom_point
# To look up position, do ?position_x (x = dodge, identity, jitter, fill etc.. )

## ## ## ## 
## EGGS ## 
## ## ## ## 

# Add some jitter to the plot 
ggplot(mpg) +
  geom_point(aes(x = cty, y = hwy),  position = "jitter") # Have to put position in geom object

# How to control jittering 
ggplot(mpg) +
  geom_jitter(aes(x = cty, y = hwy), width = 10, height = 20) 

# geom_count
ggplot(mpg) +
  geom_count(aes(x = cty, y = hwy))

# geom_count maps count of numbers of observation at each location to the point size

# defualt pos adjustment for geom boxplot -> dodge
?geom_boxplot
ggplot(mpg) + 
  geom_boxplot(aes(x = cyl, y = hwy, color = class)) #because dodge -> placed side by side
## Warning: position_dodge requires non-overlapping x intervals

# If use pos = identity instead:
ggplot(mpg) + 
  geom_boxplot(aes(x = cyl, y = hwy, color = class), position = "identity") #overlapped. 

## ## ## ## ## ## 
## COORD. SYS. ##
## ## ## ## ## ## 

# Flip it 
ggplot(mpg) +
  geom_boxplot(aes(x = class, y = hwy)) + 
  coord_flip()

## Polar coord
bar <- ggplot(data = diamonds) +
  geom_bar(aes(x = cut, fill = cut),
           show.legend = F, width = 1) +
  theme(aspect.ratio = 1) + labs(x = NULL, y = NULL)
bar + coord_flip()

bar + coord_polar()  

## #### ##
## EGGS ##
## #### ##

# Turn bar chart into pie chart 
ggplot(diamonds) + # geom_bar, 1 bar with the different cut counts 
  geom_bar(aes(x = factor(1), fill = cut), width = 1)  +
  coord_polar(theta = "y") # map angle to y

# ?labs
?labs()

?coord_quickmap()